/*

DATE: June 18 2018

PROJECT: Police killings and mental health of black Americans, replication archive

PURPOSE: This file contains code to set up the datasets and present estimates for Fig 2 and ETable 5. Of note, the core estimates
(effect of police killings of unarmed black Americans on mental health of black of Americans) come from code from "Analysis File 1"


*/

*FILEPATHS
global working "[FILE PATH]/Working Datasets/"
global source "[FILE PATH]/Source Datasets and Do Files/"


****GENERATE DATA TO CREATE FIG 2 and ETABLE 5*****

*****(1) Impact of police killings of UNARMED BLACK AMERICANS on mental health among WHITE AMERICANS
***SETUP

use "$working/black_unarmed_shootings.dta", clear

*histogram of shootings per month
expand count

bys stfip: egen total_shot = total(count)
gen shot_per_month = total_shot/45
hist shot_per_month

joinby stfip using "$working/brfss_white_sample_June182018.dta", unmatched(master) _merge(_BRF)

*Generate time from shooting estimate for every shooting-individual pair in the data
*Combine these into 3 month intervals 
gen dist = int_date - shooting_date
gen timing = .

replace timing = 1 if dist>=0&dist<=90
replace timing = 2 if dist>90&dist<=180
replace timing = 3 if dist>180&dist<=270
replace timing = 4 if dist>270&dist<360


replace timing = -1 if dist<0&dist>=-90
replace timing = -2 if dist<=-91&dist>=-180
replace timing = -3 if dist<=-181&dist>=-270
replace timing = -4 if dist<=-271&dist>=-360

tab timing, gen(t_)  /*negative numbers reflect a shooting date AFTER interview; t_5 is the immediate post period */

*Denote whether shooting occured in any of +/- 6 monthly bins around interview date
gen monthdist = .
replace monthdist = 1 if dist>=0&dist<=30
replace monthdist = 2 if dist>30&dist<=60
replace monthdist = 3 if dist>60&dist<=90
replace monthdist = 4 if dist>90&dist<=120
replace monthdist = 5 if dist>120&dist<=150
replace monthdist = 6 if dist>150&dist<=180

replace monthdist = -1 if dist<0&dist>=-30
replace monthdist = -2 if dist<-30&dist>=-60
replace monthdist = -3 if dist<-60&dist>=-90
replace monthdist = -4 if dist<-90&dist>=-120
replace monthdist = -5 if dist<-120&dist>=-150
replace monthdist = -6 if dist<-150&dist>=-180

tab monthdist, gen(m_)	/*m_7 is the immediate post period (month) */

*Collapse data to individual level
*This will yield a dataset that contains counts of number of shootings in each quarterly or monthly bin around the interview data

collapse (sum) t_* m_* (mean) _psu shot_per_month genhlth physhlth menthlth _age educa stfip imonth iyear iday _ll sex int_dat drnk _rfsm exerany income2, by(id)

*Generate a measure of exposure to any police killing in the 4 quarters around BRFSS interview
forvalues x = 1/8 {
	gen any_`x' = t_`x'
	recode any_`x' (1/max = 1)
	}

****ESTIMATES
xi: reghdfe menthlth t_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)
xi: reghdfe menthlth any_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)

/*
*Wild cluster bootstrap t p-values
set matsize 1200
xi: reg menthlth any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
xi: reg menthlth t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
*/





*****(2) Impact of police killings of ARMED BLACK AMERICANS on mental health among BLACK AMERICANS
***SETUP
use "$working/black_armed_shootings.dta", clear

*histogram of shootings per month
expand count

bys stfip: egen total_shot = total(count)
gen shot_per_month = total_shot/45
hist shot_per_month

joinby stfip using "$working/brfss_black_sample_June182018.dta", unmatched(master) _merge(_BRF)

*Generate time from shooting estimate for every shooting-individual pair in the data
*Combine these into 3 month intervals 
gen dist = int_date - shooting_date
gen timing = .

replace timing = 1 if dist>=0&dist<=90
replace timing = 2 if dist>90&dist<=180
replace timing = 3 if dist>180&dist<=270
replace timing = 4 if dist>270&dist<360


replace timing = -1 if dist<0&dist>=-90
replace timing = -2 if dist<=-91&dist>=-180
replace timing = -3 if dist<=-181&dist>=-270
replace timing = -4 if dist<=-271&dist>=-360

tab timing, gen(t_)  /*negative numbers reflect a shooting date AFTER interview; t_5 is the immediate post period */

*Denote whether shooting occured in any of +/- 6 monthly bins around interview date
gen monthdist = .
replace monthdist = 1 if dist>=0&dist<=30
replace monthdist = 2 if dist>30&dist<=60
replace monthdist = 3 if dist>60&dist<=90
replace monthdist = 4 if dist>90&dist<=120
replace monthdist = 5 if dist>120&dist<=150
replace monthdist = 6 if dist>150&dist<=180

replace monthdist = -1 if dist<0&dist>=-30
replace monthdist = -2 if dist<-30&dist>=-60
replace monthdist = -3 if dist<-60&dist>=-90
replace monthdist = -4 if dist<-90&dist>=-120
replace monthdist = -5 if dist<-120&dist>=-150
replace monthdist = -6 if dist<-150&dist>=-180

tab monthdist, gen(m_)	/*m_7 is the immediate post period (month) */

*Collapse data to individual level
*This will yield a dataset that contains counts of number of shootings in each quarterly or monthly bin around the interview data

collapse (sum) t_* m_* (mean) _psu shot_per_month genhlth physhlth menthlth _age educa stfip imonth iyear iday _ll sex int_dat drnk _rfsm exerany income2, by(id)

*Generate a measure of exposure to any police killing in the 4 quarters around BRFSS interview
forvalues x = 1/8 {
	gen any_`x' = t_`x'
	recode any_`x' (1/max = 1)
	}

****ESTIMATES
xi: reghdfe menthlth t_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)
xi: reghdfe menthlth any_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)

/*
*Wild cluster bootstrap t p-values
set matsize 1200
xi: reg menthlth any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
xi: reg menthlth t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
*/




*****(3) Impact of Killings of ARMED BLACK AMERICANS on mental health among WHITE AMERICANS
***SETUP
use "$working/black_armed_shootings.dta", clear

*histogram of shootings per month
expand count

bys stfip: egen total_shot = total(count)
gen shot_per_month = total_shot/45
hist shot_per_month

joinby stfip using "$working/brfss_white_sample_June182018.dta", unmatched(master) _merge(_BRF)

*Generate time from shooting estimate for every shooting-individual pair in the data
*Combine these into 3 month intervals 
gen dist = int_date - shooting_date
gen timing = .

replace timing = 1 if dist>=0&dist<=90
replace timing = 2 if dist>90&dist<=180
replace timing = 3 if dist>180&dist<=270
replace timing = 4 if dist>270&dist<360


replace timing = -1 if dist<0&dist>=-90
replace timing = -2 if dist<=-91&dist>=-180
replace timing = -3 if dist<=-181&dist>=-270
replace timing = -4 if dist<=-271&dist>=-360

tab timing, gen(t_)  /*negative numbers reflect a shooting date AFTER interview; t_5 is the immediate post period */

*Denote whether shooting occured in any of +/- 6 monthly bins around interview date
gen monthdist = .
replace monthdist = 1 if dist>=0&dist<=30
replace monthdist = 2 if dist>30&dist<=60
replace monthdist = 3 if dist>60&dist<=90
replace monthdist = 4 if dist>90&dist<=120
replace monthdist = 5 if dist>120&dist<=150
replace monthdist = 6 if dist>150&dist<=180

replace monthdist = -1 if dist<0&dist>=-30
replace monthdist = -2 if dist<-30&dist>=-60
replace monthdist = -3 if dist<-60&dist>=-90
replace monthdist = -4 if dist<-90&dist>=-120
replace monthdist = -5 if dist<-120&dist>=-150
replace monthdist = -6 if dist<-150&dist>=-180

tab monthdist, gen(m_)	/*m_7 is the immediate post period (month) */

*Collapse data to individual level
*This will yield a dataset that contains counts of number of shootings in each quarterly or monthly bin around the interview data

collapse (sum) t_* m_* (mean) _psu shot_per_month genhlth physhlth menthlth _age educa stfip imonth iyear iday _ll sex int_dat drnk _rfsm exerany income2, by(id)

*Generate a measure of exposure to any police killing in the 4 quarters around BRFSS interview
forvalues x = 1/8 {
	gen any_`x' = t_`x'
	recode any_`x' (1/max = 1)
	}

****ESTIMATES
xi: reghdfe menthlth t_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)
xi: reghdfe menthlth any_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)

/*
*Wild cluster bootstrap t p-values
set matsize 1200
xi: reg menthlth any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
xi: reg menthlth t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
*/




*****(4) Impact of Killings of UNARMED WHITE AMERICANS on mental health among BLACK AMERICANS
***SETUP
use "$working/white_unarmed_shootings.dta", clear

*histogram of shootings per month
expand count

bys stfip: egen total_shot = total(count)
gen shot_per_month = total_shot/45
hist shot_per_month

joinby stfip using "$working/brfss_black_sample_June182018.dta", unmatched(master) _merge(_BRF)

*Generate time from shooting estimate for every shooting-individual pair in the data
*Combine these into 3 month intervals 
gen dist = int_date - shooting_date
gen timing = .

replace timing = 1 if dist>=0&dist<=90
replace timing = 2 if dist>90&dist<=180
replace timing = 3 if dist>180&dist<=270
replace timing = 4 if dist>270&dist<360


replace timing = -1 if dist<0&dist>=-90
replace timing = -2 if dist<=-91&dist>=-180
replace timing = -3 if dist<=-181&dist>=-270
replace timing = -4 if dist<=-271&dist>=-360

tab timing, gen(t_)  /*negative numbers reflect a shooting date AFTER interview; t_5 is the immediate post period */

*Denote whether shooting occured in any of +/- 6 monthly bins around interview date
gen monthdist = .
replace monthdist = 1 if dist>=0&dist<=30
replace monthdist = 2 if dist>30&dist<=60
replace monthdist = 3 if dist>60&dist<=90
replace monthdist = 4 if dist>90&dist<=120
replace monthdist = 5 if dist>120&dist<=150
replace monthdist = 6 if dist>150&dist<=180

replace monthdist = -1 if dist<0&dist>=-30
replace monthdist = -2 if dist<-30&dist>=-60
replace monthdist = -3 if dist<-60&dist>=-90
replace monthdist = -4 if dist<-90&dist>=-120
replace monthdist = -5 if dist<-120&dist>=-150
replace monthdist = -6 if dist<-150&dist>=-180

tab monthdist, gen(m_)	/*m_7 is the immediate post period (month) */

*Collapse data to individual level
*This will yield a dataset that contains counts of number of shootings in each quarterly or monthly bin around the interview data

collapse (sum) t_* m_* (mean) _psu shot_per_month genhlth physhlth menthlth _age educa stfip imonth iyear iday _ll sex int_dat drnk _rfsm exerany income2, by(id)

*Generate a measure of exposure to any police killing in the 4 quarters around BRFSS interview
forvalues x = 1/8 {
	gen any_`x' = t_`x'
	recode any_`x' (1/max = 1)
	}

****ESTIMATES
xi: reghdfe menthlth t_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)
xi: reghdfe menthlth any_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)

/*
*Wild cluster bootstrap t p-values
set matsize 1200
xi: reg menthlth any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
xi: reg menthlth t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
*/




*****(5) Impact of police killings of UNARMED WHITE on mental health among WHITE Americans
***SETUP
use "$working/white_unarmed_shootings.dta", clear

*histogram of shootings per month
expand count

bys stfip: egen total_shot = total(count)
gen shot_per_month = total_shot/45
hist shot_per_month

joinby stfip using "$working/brfss_white_sample_June182018.dta", unmatched(master) _merge(_BRF)

*Generate time from shooting estimate for every shooting-individual pair in the data
*Combine these into 3 month intervals 
gen dist = int_date - shooting_date
gen timing = .

replace timing = 1 if dist>=0&dist<=90
replace timing = 2 if dist>90&dist<=180
replace timing = 3 if dist>180&dist<=270
replace timing = 4 if dist>270&dist<360


replace timing = -1 if dist<0&dist>=-90
replace timing = -2 if dist<=-91&dist>=-180
replace timing = -3 if dist<=-181&dist>=-270
replace timing = -4 if dist<=-271&dist>=-360

tab timing, gen(t_)  /*negative numbers reflect a shooting date AFTER interview; t_5 is the immediate post period */

*Denote whether shooting occured in any of +/- 6 monthly bins around interview date
gen monthdist = .
replace monthdist = 1 if dist>=0&dist<=30
replace monthdist = 2 if dist>30&dist<=60
replace monthdist = 3 if dist>60&dist<=90
replace monthdist = 4 if dist>90&dist<=120
replace monthdist = 5 if dist>120&dist<=150
replace monthdist = 6 if dist>150&dist<=180

replace monthdist = -1 if dist<0&dist>=-30
replace monthdist = -2 if dist<-30&dist>=-60
replace monthdist = -3 if dist<-60&dist>=-90
replace monthdist = -4 if dist<-90&dist>=-120
replace monthdist = -5 if dist<-120&dist>=-150
replace monthdist = -6 if dist<-150&dist>=-180

tab monthdist, gen(m_)	/*m_7 is the immediate post period (month) */

*Collapse data to individual level
*This will yield a dataset that contains counts of number of shootings in each quarterly or monthly bin around the interview data

collapse (sum) t_* m_* (mean) _psu shot_per_month genhlth physhlth menthlth _age educa stfip imonth iyear iday _ll sex int_dat drnk _rfsm exerany income2, by(id)

*Generate a measure of exposure to any police killing in the 4 quarters around BRFSS interview
forvalues x = 1/8 {
	gen any_`x' = t_`x'
	recode any_`x' (1/max = 1)
	}

****ESTIMATES
xi: reghdfe menthlth t_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)
xi: reghdfe menthlth any_5 [pw  = _llcpwt] , abs(stfip#imonth imonth#iyear iday sex _ageg5yr educa) cluster(stfip)

/*
*Wild cluster bootstrap t p-values
set matsize 1200
xi: reg menthlth any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct any_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
xi: reg menthlth t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa [pw  = _llcpwt] , cluster(stfip) 
xi: bootwildct t_5 i.stfip*i.imonth i.iyear*i.imonth i.iday sex i._ageg5 i.educa  , numvars(1) bootreps(50)
*/

